Background

For this project, I decided to recreate Hans Rosling’s population and GDP graph. The graph can be found here.

Since the video did not share the dataset, I downloaded another dataset from Our World in Data.

Data Overview and Wrangling

Although the year starts from 1870, I decided to use data only after 1950 because most countries miss values for population and GDP for years before 1950.

le <- read.csv("data/life_expectancy_data.csv") %>% 
  janitor::clean_names() %>% 
  na.omit() %>% 
  filter(entity != "World",
         year >= 1950) 
le %>% head()
##        entity code year life_expectancy gdp_per_capita x145446_annotations
## 1 Afghanistan  AFG 1950          27.638           1156                    
## 2 Afghanistan  AFG 1951          27.878           1170                    
## 3 Afghanistan  AFG 1952          28.361           1189                    
## 4 Afghanistan  AFG 1953          28.852           1240                    
## 5 Afghanistan  AFG 1954          29.350           1245                    
## 6 Afghanistan  AFG 1955          29.854           1246                    
##   population_historical_estimates continent
## 1                         7752117          
## 2                         7840151          
## 3                         7935996          
## 4                         8039684          
## 5                         8151316          
## 6                         8270992

There are 166 countries in the dataset, but it misses information about which continent a country belongs to.

I used this website as a reference to continent, and manually labeled each country.

le %>% distinct(entity) %>% nrow()
## [1] 166
le <- le %>% 
  mutate(continent = case_when
         (entity %in% c("Algeria", "Angola", "Benin", "Botswana",
                        "Burkina Faso", "Burundi", "Cameroon", "Cape Verde",
                        "Central African Republic", "Chad", "Comoros", "Congo",
                        "Cote d'Ivoire", "Democratic Republic of Congo",
                        "Djibouti", "Egypt", "Equatorial Guinea", "Eswatini",
                        "Ethiopia", "Gabon", "Gambia", "Ghana", "Guinea",
                        "Guinea-Bissau", "Kenya", "Lesotho", "Liberia", "Libya",
                        "Madagascar", "Malawi", "Mali", "Mauritania", "Mauritius", 
                        "Morocco", "Mozambique", "Namibia", "Niger", "Nigeria", "Rwanda",
                        "Sao Tome and Principe", "Senegal", "Seychelles",
                        "Sierra Leone", "South Africa", "Sudan", "Tanzania", "Togo",
                        "Tunisia", "Uganda", "Zambia", "Zimbabwe"
                        )
           ~ "Africa",
           
          entity %in% c("Barbados", "Canada", "Costa Rica", "Cuba", "Dominica",
                        "Dominican Republic", "El Salvador", "Guatemala", "Haiti",
                        "Honduras", "Jamaica", "Mexico", "Nicaragua",
                        "Panama", "Puerto Rico", "Saint Lucia", 
                        "Trinidad and Tobago", "United States"
                        ) 
           ~ "North America",
           
          entity %in% c("Argentina", "Bolivia", "Brazil", "Chile", "Colombia",
                        "Ecuador", "Paraguay", "Peru", "Uruguay", "Venezuela"
                        ) 
           ~ "South America",
           
          entity %in% c("Afghanistan", "Azerbaijan", "Bahrain", "Bangladesh",
                        "Cambodia", "China", "Cyprus", "Georgia", "Hong Kong",
                        "India", "Indonesia", "Iran", "Iraq", "Israel", "Japan",
                        "Jordan", "Kazakhstan", "Kuwait", "Kyrgyzstan", "Laos",
                        "Lebanon", "Malaysia", "Moldova", "Mongolia", "Myanmar",
                        "Nepal", "North Korea", "Oman", "Pakistan", "Palestine",
                        "Philippines", "Qatar", "Saudi Arabia", "Singapore",
                        "South Korea", "Sri Lanka", "Syria", "Taiwan", "Tajikistan",
                        "Thailand", "Turkey", "Turkmenistan", "United Arab Emirates",
                        "Uzbekistan", "Vietnam", "Yemen"
                        ) 
           ~ "Asia",
           
          entity %in% c("Albania", "Armenia", "Austria", "Belarus", "Belgium",
                        "Bosnia and Herzegovina", "Bulgaria", "Croatia", "Czechia",
                        "Denmark", "Estonia", "Finland", "France", "Germany", 
                        "Greece", "Iceland", "Ireland", "Italy", "Latvia",
                        "Lithuania", "Luxembourg", "Malta", "Montenegro",
                        "Netherlands", "North Macedonia", "Norway", "Poland",
                        "Portugal", "Romania", "Serbia", "Slovakia", "Slovenia",
                        "Spain", "Sweden", "Switzerland", "Ukraine", 
                        "United Kingdom", "Hungary"
                        ) 
           ~ "Europe",
           
          entity %in% c("Australia", "New Zealand"
                        ) 
           ~ "Oceania"
  ))

I should have used left_join for the sake of efficiency…

Data Visualization

I used plotly and crosstalk to make a visualization.

library(crosstalk)
shared_le <- SharedData$new(le)

bscols(widths = c(2,NA),
  list(
    filter_select("continent", "Continent", shared_le, ~continent)
  ),
  ggplotly(shared_le %>% 
  ggplot(aes(gdp_per_capita, life_expectancy)) +
  geom_point(aes(size = population_historical_estimates, frame = year, ids = entity, color = continent)) +
  scale_x_log10() +
  labs(x = "GDP per Capita", y = "Life Expectancy", size = "Continent"))
)
## Warning: Ignoring unknown aesthetics: frame, ids
## Warning in p$x$data[firstFrame] <- p$x$frames[[1]]$data: number of items to
## replace is not a multiple of replacement length

Show countries in North America

le_na <- le %>% filter(continent == "North America")
shared_le_na <- SharedData$new(le_na)

bscols(widths = c(2,NA),
  list(
    filter_select("entity", "Country", shared_le_na, ~entity)
  ),
  ggplotly(shared_le_na %>% 
  ggplot(aes(gdp_per_capita, life_expectancy)) +
  geom_point(aes(size = population_historical_estimates, frame = year, color = entity)) +
  scale_x_log10() +
  labs(x = "GDP per Capita", y = "Life Expectancy", size = "Country"))
)
## Warning: Ignoring unknown aesthetics: frame

Reflection